Fit Initial BG/NBD Models

1 Load Datasets

We start by re-loading the data required for these models.

1.1 Load Pre-processed Transactional Data

retail_transaction_data_tbl <- read_rds("data/retail_data_cleaned_tbl.rds")
retail_transaction_data_tbl %>% glimpse()
## Rows: 1,021,424
## Columns: 23
## $ row_id            <chr> "ROW0000001", "ROW0000002", "ROW0000003", "ROW000000…
## $ excel_sheet       <chr> "Year 2009-2010", "Year 2009-2010", "Year 2009-2010"…
## $ invoice_id        <chr> "489434", "489434", "489434", "489434", "489434", "4…
## $ stock_code        <chr> "85048", "79323P", "79323W", "22041", "21232", "2206…
## $ description       <chr> "15CM CHRISTMAS GLASS BALL 20 LIGHTS", "PINK CHERRY …
## $ quantity          <dbl> 12, 12, 12, 48, 24, 24, 24, 10, 12, 12, 24, 12, 10, …
## $ invoice_date      <date> 2009-12-01, 2009-12-01, 2009-12-01, 2009-12-01, 200…
## $ price             <dbl> 6.95, 6.75, 6.75, 2.10, 1.25, 1.65, 1.25, 5.95, 2.55…
## $ customer_id       <chr> "13085", "13085", "13085", "13085", "13085", "13085"…
## $ country           <chr> "United Kingdom", "United Kingdom", "United Kingdom"…
## $ stock_code_upr    <chr> "85048", "79323P", "79323W", "22041", "21232", "2206…
## $ cancellation      <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ invoice_dttm      <dttm> 2009-12-01 07:45:00, 2009-12-01 07:45:00, 2009-12-0…
## $ invoice_month     <chr> "December", "December", "December", "December", "Dec…
## $ invoice_dow       <chr> "Tuesday", "Tuesday", "Tuesday", "Tuesday", "Tuesday…
## $ invoice_dom       <chr> "01", "01", "01", "01", "01", "01", "01", "01", "01"…
## $ invoice_hour      <chr> "07", "07", "07", "07", "07", "07", "07", "07", "07"…
## $ invoice_minute    <chr> "45", "45", "45", "45", "45", "45", "45", "45", "45"…
## $ invoice_woy       <chr> "49", "49", "49", "49", "49", "49", "49", "49", "49"…
## $ invoice_ym        <chr> "200912", "200912", "200912", "200912", "200912", "2…
## $ stock_value       <dbl> 83.40, 81.00, 81.00, 100.80, 30.00, 39.60, 30.00, 59…
## $ invoice_monthprop <dbl> 0.04347826, 0.04347826, 0.04347826, 0.04347826, 0.04…
## $ exclude           <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…

We also want to load the customer transactional data.

customer_transactions_tbl <- read_rds("data/customer_transactions_tbl.rds")
customer_transactions_tbl %>% glimpse()
## Rows: 37,031
## Columns: 4
## $ tnx_timestamp <dttm> 2009-12-01 07:45:00, 2009-12-01 07:45:59, 2009-12-01 09…
## $ customer_id   <chr> "13085", "13085", "13078", "15362", "18102", "12682", "1…
## $ invoice_id    <chr> "489434", "489435", "489436", "489437", "489438", "48943…
## $ total_spend   <dbl> 505.30, 145.80, 630.33, 310.75, 2286.24, 426.30, 50.40, …

Finally, we want to load the various datasets used to fit the P/NBD models

btyd_fitdata_tbl <- read_rds("data/btyd_fitdata_tbl.rds")

btyd_fitdata_tbl %>% glimpse()
## Rows: 4,716
## Columns: 6
## $ customer_id    <chr> "12346", "12347", "12348", "12349", "12350", "12351", "…
## $ first_tnx_date <dttm> 2009-12-14 08:34:00, 2010-10-31 14:19:59, 2010-09-27 1…
## $ last_tnx_date  <dttm> 2011-01-18 10:00:59, 2011-01-26 14:29:59, 2011-01-25 1…
## $ x              <dbl> 11, 2, 2, 2, 0, 0, 6, 0, 0, 3, 1, 2, 7, 4, 3, 1, 1, 0, …
## $ t_x            <dbl> 57.151488095, 12.429563492, 17.117361111, 25.970535714,…
## $ T_cal          <dbl> 67.520437, 21.628968, 26.482242, 48.063492, 8.190377, 1…
btyd_obs_stats_tbl <- read_rds("data/btyd_obs_stats_tbl.rds")

btyd_obs_stats_tbl %>% glimpse()
## Rows: 3,849
## Columns: 4
## $ customer_id <chr> "12347", "12348", "12349", "12352", "12353", "12354", "123…
## $ tnx_count   <int> 5, 2, 1, 3, 1, 1, 1, 2, 1, 2, 2, 3, 9, 2, 4, 1, 1, 2, 2, 1…
## $ first_tnx   <dttm> 2011-04-07 10:43:00, 2011-04-05 10:47:00, 2011-11-21 09:5…
## $ last_tnx    <dttm> 2011-12-07 15:51:59, 2011-09-25 13:13:00, 2011-11-21 09:5…
extreme_customers_tbl <- read_rds("data/extreme_customers_tbl.rds")

extreme_customers_tbl %>% glimpse()
## Rows: 1,637
## Columns: 6
## $ customer_id    <chr> "12350", "12351", "12353", "12355", "12357", "12365", "…
## $ first_tnx_date <dttm> 2011-02-02 16:00:59, 2010-11-29 15:23:00, 2010-10-27 1…
## $ last_tnx_date  <dttm> 2011-02-02 16:00:59, 2010-11-29 15:23:00, 2010-10-27 1…
## $ x              <dbl> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ t_x            <dbl> 0.000000000, 0.000000000, 0.000000000, 0.000000000, 0.0…
## $ T_cal          <dbl> 8.190377, 17.479861, 22.209921, 44.928671, 19.368552, 5…

1.2 Construct Customer Cohorts

Later in this worksheet we will want to create customer cohorts based on the date of the first transaction for each customer.

customer_cohortdata_tbl <- customer_transactions_tbl %>%
  group_by(customer_id) %>%
  summarise(
    .groups = "drop",
    
    first_tnx_date = min(tnx_timestamp)
    ) %>%
  mutate(
    cohort_qtr = first_tnx_date %>% as.yearqtr(),
    cohort_ym  = first_tnx_date %>% format("%Y %m"),
    
    .after = "customer_id"
    )

customer_cohortdata_tbl %>% glimpse()
## Rows: 5,878
## Columns: 4
## $ customer_id    <chr> "12346", "12347", "12348", "12349", "12350", "12351", "…
## $ cohort_qtr     <yearqtr> 2009 Q4, 2010 Q4, 2010 Q3, 2010 Q2, 2011 Q1, 2010 Q…
## $ cohort_ym      <chr> "2009 12", "2010 10", "2010 09", "2010 04", "2011 02", …
## $ first_tnx_date <dttm> 2009-12-14 08:34:00, 2010-10-31 14:19:59, 2010-09-27 1…

2 Fit Initial BG/NBD Model

stan_modeldir <- "stan_models"
stan_codedir  <-   "stan_code"
## functions {
##   #include util_functions.stan
## }
## 
## data {
##   int<lower=1> n;           // number of customers
## 
##   vector<lower=0>[n] t_x;   // time to most recent purchase
##   vector<lower=0>[n] T_cal; // total observation time
##   vector<lower=0>[n] x;     // number of purchases observed
## 
##   real<lower=0> lambda_mn;  // prior mean for lambda
##   real<lower=0> lambda_cv;  // prior cv   for lambda
## 
##   real<lower=0,upper=1> p_mn; // prior mean     for p
##   real<lower=0>         p_k;  // prior strength for p
## }
## 
## 
## transformed data {
##   real<lower=0> r     = 1 / (lambda_cv * lambda_cv);
##   real<lower=0> alpha = 1 / (lambda_cv * lambda_cv * lambda_mn);
## 
##   real<lower=0> a     = p_k * p_mn;
##   real<lower=0> b     = p_k * (1 - p_mn);
## }
## 
## 
## parameters {
##   vector<lower=0>[n]         lambda;  // purchase rate
##   vector<lower=0,upper=1>[n] p;       // dropout probabilit
## }
## 
## 
## model {
##   // setting priors
##   lambda ~ gamma(r, alpha);
##   p      ~ beta (a, b);
## 
##   target += calculate_bgnbd_loglik(n, lambda, p, x, t_x, T_cal);
## }

We now compile this model using CmdStanR.

bgnbd_fixed_stanmodel <- cmdstan_model(
  "stan_code/bgnbd_fixed.stan",
  include_paths =   stan_codedir,
  pedantic      =           TRUE,
  dir           =  stan_modeldir
  )

2.1 Fit the Model

We then use this compiled model with our data to produce a fit of the data.

stan_modelname <- "bgnbd_fixed"
stanfit_prefix <- str_c("fit_", stan_modelname) 

stan_data_lst <- btyd_fitdata_tbl %>%
  select(customer_id, x, t_x, T_cal) %>%
  compose_data(
    lambda_mn = 0.25,
    lambda_cv = 1.00,
    
    p_mn      =  0.10,
    p_k       = 10.00,
    )

bgnbd_fixed_stanfit <- bgnbd_fixed_stanmodel$sample(
  data            =                stan_data_lst,
  chains          =                            4,
  iter_warmup     =                          500,
  iter_sampling   =                          500,
  seed            =                         4201,
  save_warmup     =                         TRUE,
  output_dir      =                stan_modeldir,
  output_basename =               stanfit_prefix,
  )
## Running MCMC with 4 chains, at most 8 in parallel...
## 
## Chain 1 Iteration:   1 / 1000 [  0%]  (Warmup) 
## Chain 2 Iteration:   1 / 1000 [  0%]  (Warmup) 
## Chain 3 Iteration:   1 / 1000 [  0%]  (Warmup) 
## Chain 4 Iteration:   1 / 1000 [  0%]  (Warmup) 
## Chain 2 Iteration: 100 / 1000 [ 10%]  (Warmup) 
## Chain 3 Iteration: 100 / 1000 [ 10%]  (Warmup) 
## Chain 1 Iteration: 100 / 1000 [ 10%]  (Warmup) 
## Chain 4 Iteration: 100 / 1000 [ 10%]  (Warmup) 
## Chain 2 Iteration: 200 / 1000 [ 20%]  (Warmup) 
## Chain 3 Iteration: 200 / 1000 [ 20%]  (Warmup) 
## Chain 1 Iteration: 200 / 1000 [ 20%]  (Warmup) 
## Chain 4 Iteration: 200 / 1000 [ 20%]  (Warmup) 
## Chain 2 Iteration: 300 / 1000 [ 30%]  (Warmup) 
## Chain 3 Iteration: 300 / 1000 [ 30%]  (Warmup) 
## Chain 1 Iteration: 300 / 1000 [ 30%]  (Warmup) 
## Chain 4 Iteration: 300 / 1000 [ 30%]  (Warmup) 
## Chain 2 Iteration: 400 / 1000 [ 40%]  (Warmup) 
## Chain 3 Iteration: 400 / 1000 [ 40%]  (Warmup) 
## Chain 1 Iteration: 400 / 1000 [ 40%]  (Warmup) 
## Chain 4 Iteration: 400 / 1000 [ 40%]  (Warmup) 
## Chain 2 Iteration: 500 / 1000 [ 50%]  (Warmup) 
## Chain 2 Iteration: 501 / 1000 [ 50%]  (Sampling) 
## Chain 3 Iteration: 500 / 1000 [ 50%]  (Warmup) 
## Chain 3 Iteration: 501 / 1000 [ 50%]  (Sampling) 
## Chain 1 Iteration: 500 / 1000 [ 50%]  (Warmup) 
## Chain 1 Iteration: 501 / 1000 [ 50%]  (Sampling) 
## Chain 4 Iteration: 500 / 1000 [ 50%]  (Warmup) 
## Chain 4 Iteration: 501 / 1000 [ 50%]  (Sampling) 
## Chain 2 Iteration: 600 / 1000 [ 60%]  (Sampling) 
## Chain 3 Iteration: 600 / 1000 [ 60%]  (Sampling) 
## Chain 1 Iteration: 600 / 1000 [ 60%]  (Sampling) 
## Chain 4 Iteration: 600 / 1000 [ 60%]  (Sampling) 
## Chain 2 Iteration: 700 / 1000 [ 70%]  (Sampling) 
## Chain 3 Iteration: 700 / 1000 [ 70%]  (Sampling) 
## Chain 1 Iteration: 700 / 1000 [ 70%]  (Sampling) 
## Chain 4 Iteration: 700 / 1000 [ 70%]  (Sampling) 
## Chain 2 Iteration: 800 / 1000 [ 80%]  (Sampling) 
## Chain 3 Iteration: 800 / 1000 [ 80%]  (Sampling) 
## Chain 1 Iteration: 800 / 1000 [ 80%]  (Sampling) 
## Chain 4 Iteration: 800 / 1000 [ 80%]  (Sampling) 
## Chain 2 Iteration: 900 / 1000 [ 90%]  (Sampling) 
## Chain 3 Iteration: 900 / 1000 [ 90%]  (Sampling) 
## Chain 1 Iteration: 900 / 1000 [ 90%]  (Sampling) 
## Chain 4 Iteration: 900 / 1000 [ 90%]  (Sampling) 
## Chain 2 Iteration: 1000 / 1000 [100%]  (Sampling) 
## Chain 2 finished in 132.1 seconds.
## Chain 3 Iteration: 1000 / 1000 [100%]  (Sampling) 
## Chain 3 finished in 134.9 seconds.
## Chain 1 Iteration: 1000 / 1000 [100%]  (Sampling) 
## Chain 1 finished in 135.5 seconds.
## Chain 4 Iteration: 1000 / 1000 [100%]  (Sampling) 
## Chain 4 finished in 138.5 seconds.
## 
## All 4 chains finished successfully.
## Mean chain execution time: 135.2 seconds.
## Total execution time: 138.8 seconds.
bgnbd_fixed_stanfit$summary()
## # A tibble: 9,433 × 10
##    variable        mean   median      sd     mad       q5      q95  rhat ess_b…¹
##    <chr>          <dbl>    <dbl>   <dbl>   <dbl>    <dbl>    <dbl> <dbl>   <dbl>
##  1 lp__        -8.32e+4 -8.32e+4 75.7    74.6    -8.33e+4 -8.30e+4 1.00     629.
##  2 lambda[1]    1.76e-1  1.70e-1  0.0535  0.0509  9.92e-2  2.71e-1 0.999   1891.
##  3 lambda[2]    1.33e-1  1.16e-1  0.0888  0.0725  2.95e-2  2.96e-1 1.00    1591.
##  4 lambda[3]    1.06e-1  9.30e-2  0.0663  0.0555  2.59e-2  2.33e-1 1.00    1891.
##  5 lambda[4]    6.95e-2  5.92e-2  0.0452  0.0369  1.68e-2  1.54e-1 0.999   1991.
##  6 lambda[5]    1.30e-1  7.27e-2  0.172   0.0817  4.97e-3  4.67e-1 1.00    1735.
##  7 lambda[6]    1.26e-1  5.83e-2  0.178   0.0666  4.35e-3  4.82e-1 1.00    1819.
##  8 lambda[7]    2.94e-1  2.81e-1  0.108   0.105   1.39e-1  4.85e-1 1.00    1759.
##  9 lambda[8]    1.30e-1  4.89e-2  0.206   0.0593  2.42e-3  5.39e-1 1.00    1587.
## 10 lambda[9]    1.60e-1  6.42e-2  0.225   0.0844  3.02e-3  6.22e-1 1.00    1134.
## # … with 9,423 more rows, 1 more variable: ess_tail <dbl>, and abbreviated
## #   variable name ¹​ess_bulk

We have some basic HMC-based validity statistics we can check.

bgnbd_fixed_stanfit$cmdstan_diagnose()
## Processing csv files: /home/rstudio/workshop/stan_models/fit_bgnbd_fixed-1.csvWarning: non-fatal error reading adaptation data
## , /home/rstudio/workshop/stan_models/fit_bgnbd_fixed-2.csvWarning: non-fatal error reading adaptation data
## , /home/rstudio/workshop/stan_models/fit_bgnbd_fixed-3.csvWarning: non-fatal error reading adaptation data
## , /home/rstudio/workshop/stan_models/fit_bgnbd_fixed-4.csvWarning: non-fatal error reading adaptation data
## 
## 
## Checking sampler transitions treedepth.
## Treedepth satisfactory for all transitions.
## 
## Checking sampler transitions for divergences.
## No divergent transitions found.
## 
## Checking E-BFMI - sampler transitions HMC potential energy.
## E-BFMI satisfactory.
## 
## Effective sample size satisfactory.
## 
## Split R-hat values satisfactory all parameters.
## 
## Processing complete, no problems detected.

2.2 Visual Diagnostics of the Sample Validity

Now that we have a sample from the posterior distribution we need to create a few different visualisations of the diagnostics.

sample_params <- c(
  "lambda[1]", "lambda[2]", "lambda[3]", "lambda[4]", "lambda[5]", "lambda[6]",
  "p[1]",      "p[2]",      "p[3]",      "p[4]",      "p[5]",      "p[6]"
  )

bgnbd_fixed_stanfit$draws(inc_warmup = FALSE) %>%
  mcmc_trace(pars = sample_params) +
  expand_limits(y = 0) +
  labs(
    x = "Iteration",
    y = "Value",
    title = "Traceplot of Sample of Lambda and p Values"
    ) +
  theme(axis.text.x = element_text(size = 10))

A common MCMC diagnostic is \(\hat{R}\) - which is a measure of the ‘similarity’ of the chains.

bgnbd_fixed_stanfit %>%
  rhat(pars = c("lambda", "p")) %>%
  mcmc_rhat() +
    ggtitle("Plot of Parameter R-hat Values")

Related to this quantity is the concept of effective sample size, \(N_{eff}\), an estimate of the size of the sample from a statistical information point of view.

bgnbd_fixed_stanfit %>%
  neff_ratio(pars = c("lambda", "p")) %>%
  mcmc_neff() +
    ggtitle("Plot of Parameter Effective Sample Sizes")

Finally, we also want to look at autocorrelation in the chains for each parameter.

bgnbd_fixed_stanfit$draws() %>%
  mcmc_acf(pars = sample_params) +
    ggtitle("Autocorrelation Plot of Sample Values")

2.3 Validate the Hierarchical Lambda-Mean Model

We now want to validate the outputs of this model by looking at the posteriors for both \(\lambda\), \(\mu\) and p_alive, and then running our simulations using these outputs.

bgnbd_fixed_validation_tbl <- bgnbd_fixed_stanfit %>%
  recover_types(btyd_fitdata_tbl) %>%
  spread_draws(lambda[customer_id], p[customer_id]) %>%
  ungroup() %>%
  select(
    customer_id, draw_id = .draw, post_lambda = lambda, post_p = p
    )

bgnbd_fixed_validation_tbl %>% glimpse()
## Rows: 9,432,000
## Columns: 4
## $ customer_id <chr> "12346", "12346", "12346", "12346", "12346", "12346", "123…
## $ draw_id     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
## $ post_lambda <dbl> 0.243379, 0.134271, 0.390812, 0.175006, 0.252511, 0.140594…
## $ post_p      <dbl> 0.16930600, 0.01793380, 0.25364600, 0.02760890, 0.13716200…

Having constructed our simulations inputs, we now generate our simulations.

precompute_dir <- "precompute/bgnbd_fixed"

precomputed_tbl <- dir_ls(precompute_dir) %>%
  enframe(name = NULL, value = "sim_file") %>%
  mutate(sim_file = sim_file %>% as.character())


bgnbd_fixed_validsims_lookup_tbl <- bgnbd_fixed_validation_tbl %>%
  group_nest(customer_id, .key = "cust_params") %>%
  mutate(
    sim_file = glue(
      "{precompute_dir}/sims_bgnbd_fixed_{customer_id}.rds"
      )
    )
    

exec_tbl <-  bgnbd_fixed_validsims_lookup_tbl %>%
  anti_join(precomputed_tbl, by = "sim_file")


if(exec_tbl %>% nrow() > 0) {
  exec_tbl %>%
    mutate(
      calc_file = future_map2_lgl(
        sim_file, cust_params,
        run_bgnbd_simulations_chunk,
        start_dttm = as.POSIXct("2011-04-01"),
        end_dttm   = as.POSIXct("2011-12-10"),
  
        .options = furrr_options(
          globals  = c(
            "calculate_event_times", "rgamma_mucv", "gamma_mucv2shaperate",
            "generate_bgnbd_validation_transactions"
            ),
          packages   = c("tidyverse", "fs"),
          scheduling = FALSE,
          seed       = 4202
          ),
        .progress = TRUE
        )
      )
}

exec_tbl %>% glimpse()
## Rows: 0
## Columns: 3
## $ customer_id <chr> 
## $ cust_params <list<tibble[,3]>> list()
## $ sim_file    <glue>
bgnbd_fixed_validsims_lookup_tbl %>% glimpse()
## Rows: 4,716
## Columns: 3
## $ customer_id <chr> "12346", "12347", "12348", "12349", "12350", "12351", "123…
## $ cust_params <list<tibble[,3]>> [<tbl_df[2000 x 3]>], [<tbl_df[2000 x 3]>], […
## $ sim_file    <glue> "precompute/bgnbd_fixed/sims_bgnbd_fixed_12346.rds", "pre…

We now load all the simulations into a file.

bgnbd_fixed_validsims_tbl <- bgnbd_fixed_validsims_lookup_tbl %>%
  mutate(
    data = map(sim_file, ~ .x %>% read_rds() %>% select(draw_id, sim_tnx_count, sim_tnx_last))
    ) %>%
  select(customer_id, sim_file, data) %>%
  unnest(data)

bgnbd_fixed_validsims_tbl %>% glimpse()
## Rows: 9,432,000
## Columns: 5
## $ customer_id   <chr> "12346", "12346", "12346", "12346", "12346", "12346", "1…
## $ sim_file      <glue> "precompute/bgnbd_fixed/sims_bgnbd_fixed_12346.rds", "p…
## $ draw_id       <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
## $ sim_tnx_count <int> 4, 5, 0, 4, 5, 2, 7, 1, 9, 7, 0, 6, 4, 5, 8, 7, 3, 0, 7,…
## $ sim_tnx_last  <dttm> 2011-05-30 18:44:35, 2011-10-14 23:48:52, NA, 2011-11-2…

We also want to look at how our simulated data compares to the transaction data that occurred after Apr 1 2011.

tnx_data_tbl <- btyd_obs_stats_tbl %>% 
  semi_join(bgnbd_fixed_validsims_tbl, by = "customer_id")

obs_customer_count  <- tnx_data_tbl %>% nrow()
obs_total_tnx_count <- tnx_data_tbl %>% pull(tnx_count) %>% sum()

bgnbd_fixed_tnx_simsumm_tbl <- bgnbd_fixed_validsims_tbl %>%
  group_by(draw_id) %>%
  summarise(
    .groups = "drop",
    
    sim_customer_count  = length(sim_tnx_count[sim_tnx_count > 0]),
    sim_total_tnx_count = sum(sim_tnx_count)
    )


ggplot(bgnbd_fixed_tnx_simsumm_tbl) +
  geom_histogram(aes(x = sim_customer_count), binwidth = 10) +
  geom_vline(aes(xintercept = obs_customer_count), colour = "red") +
  labs(
    x = "Simulated Customers With Transactions",
    y = "Frequency",
    title = "Histogram of Count of Customers Transacted",
    subtitle = "Observed Count in Red"
    )

ggplot(bgnbd_fixed_tnx_simsumm_tbl) +
  geom_histogram(aes(x = sim_total_tnx_count), binwidth = 50) +
  geom_vline(aes(xintercept = obs_total_tnx_count), colour = "red") +
  labs(
    x = "Simulated Transaction Count",
    y = "Frequency",
    title = "Histogram of Count of Total Transaction Count",
    subtitle = "Observed Count in Red"
    )

So we have gone from a strong under-estimation bias to a strong over-estimation bias, but the model is fitting much faster, so working on this.

3 Fit Hierarchical Mean BG/NBD Model

## functions {
##   #include util_functions.stan
## }
## 
## data {
##   int<lower=1> n;           // number of customers
## 
##   vector<lower=0>[n] t_x;   // time to most recent purchase
##   vector<lower=0>[n] T_cal; // total observation time
##   vector<lower=0>[n] x;     // number of purchases observed
## 
##   real          lambda_mn_p1;  // lambda mean prior p1
##   real<lower=0> lambda_mn_p2;  // lambda mean prior p2
## 
##   real<lower=0> lambda_cv;
## 
##   real<lower=0> p_mn_mu;       // p mean prior p1
##   real<lower=0> p_mn_k;        // p mean prior p2
## 
##   real<lower=0> p_k;
## }
## 
## 
## transformed data {
##   real<lower=0> r = 1 / (lambda_cv * lambda_cv);
## 
##   real<lower=0> p_mn_a = p_mn_k * p_mn_mu;
##   real<lower=0> p_mn_b = p_mn_k * (1 - p_mn_mu);
## }
## 
## parameters {
##   real<lower=0>              lambda_mn;
##   real<lower=0,upper=1>      p_mn;
## 
##   vector<lower=0>[n]         lambda;  // purchase rate
##   vector<lower=0,upper=1>[n] p;       // dropout probabilit
## }
## 
## 
## transformed parameters {
##   real<lower=0> alpha = 1 / (lambda_cv * lambda_cv * lambda_mn);
## 
##   real<lower=0> p_a   = p_k * p_mn;
##   real<lower=0> p_b   = p_k * (1 - p_mn);
## }
## 
## 
## model {
##   // set hyper-priors
##   lambda_mn ~ lognormal(lambda_mn_p1, lambda_mn_p2);
## 
##   p_mn      ~ beta     (p_mn_a, p_mn_b);
## 
##   // setting priors
##   lambda ~ gamma(r,   alpha);
##   p      ~ beta (p_a, p_b);
## 
##   target += calculate_bgnbd_loglik(n, lambda, p, x, t_x, T_cal);
## }

We now compile this model using CmdStanR.

bgnbd_hier_means_stanmodel <- cmdstan_model(
  "stan_code/bgnbd_hier_means.stan",
  include_paths =   stan_codedir,
  pedantic      =           TRUE,
  dir           =  stan_modeldir
)

3.1 Fit the Model

We then use this compiled model with our data to produce a fit of the data.

stan_modelname <- "bgnbd_hier_means"
stanfit_prefix <- str_c("fit_", stan_modelname) 

stan_data_lst <- btyd_fitdata_tbl %>%
  select(customer_id, x, t_x, T_cal) %>%
  compose_data(
    lambda_mn_p1 = log(1) - 0.5 * (1.0)^2,
    lambda_mn_p2 = 1.0,

    lambda_cv    = 1.0,

    p_mn_mu      = 0.1,
    p_mn_k       = 10,

    p_k          = 10
  )

bgnbd_hier_means_stanfit <- bgnbd_hier_means_stanmodel$sample(
  data            =                stan_data_lst,
  chains          =                            4,
  iter_warmup     =                          500,
  iter_sampling   =                          500,
  seed            =                         4202,
  save_warmup     =                         TRUE,
  output_dir      =                stan_modeldir,
  output_basename =               stanfit_prefix,
)
## Running MCMC with 4 chains, at most 8 in parallel...
## 
## Chain 1 Iteration:   1 / 1000 [  0%]  (Warmup) 
## Chain 2 Iteration:   1 / 1000 [  0%]  (Warmup) 
## Chain 3 Iteration:   1 / 1000 [  0%]  (Warmup) 
## Chain 4 Iteration:   1 / 1000 [  0%]  (Warmup) 
## Chain 4 Iteration: 100 / 1000 [ 10%]  (Warmup) 
## Chain 2 Iteration: 100 / 1000 [ 10%]  (Warmup) 
## Chain 1 Iteration: 100 / 1000 [ 10%]  (Warmup) 
## Chain 3 Iteration: 100 / 1000 [ 10%]  (Warmup) 
## Chain 4 Iteration: 200 / 1000 [ 20%]  (Warmup) 
## Chain 2 Iteration: 200 / 1000 [ 20%]  (Warmup) 
## Chain 1 Iteration: 200 / 1000 [ 20%]  (Warmup) 
## Chain 3 Iteration: 200 / 1000 [ 20%]  (Warmup) 
## Chain 4 Iteration: 300 / 1000 [ 30%]  (Warmup) 
## Chain 2 Iteration: 300 / 1000 [ 30%]  (Warmup) 
## Chain 1 Iteration: 300 / 1000 [ 30%]  (Warmup) 
## Chain 3 Iteration: 300 / 1000 [ 30%]  (Warmup) 
## Chain 4 Iteration: 400 / 1000 [ 40%]  (Warmup) 
## Chain 2 Iteration: 400 / 1000 [ 40%]  (Warmup) 
## Chain 1 Iteration: 400 / 1000 [ 40%]  (Warmup) 
## Chain 3 Iteration: 400 / 1000 [ 40%]  (Warmup) 
## Chain 4 Iteration: 500 / 1000 [ 50%]  (Warmup) 
## Chain 4 Iteration: 501 / 1000 [ 50%]  (Sampling) 
## Chain 2 Iteration: 500 / 1000 [ 50%]  (Warmup) 
## Chain 2 Iteration: 501 / 1000 [ 50%]  (Sampling) 
## Chain 1 Iteration: 500 / 1000 [ 50%]  (Warmup) 
## Chain 1 Iteration: 501 / 1000 [ 50%]  (Sampling) 
## Chain 4 Iteration: 600 / 1000 [ 60%]  (Sampling) 
## Chain 3 Iteration: 500 / 1000 [ 50%]  (Warmup) 
## Chain 3 Iteration: 501 / 1000 [ 50%]  (Sampling) 
## Chain 2 Iteration: 600 / 1000 [ 60%]  (Sampling) 
## Chain 1 Iteration: 600 / 1000 [ 60%]  (Sampling) 
## Chain 4 Iteration: 700 / 1000 [ 70%]  (Sampling) 
## Chain 3 Iteration: 600 / 1000 [ 60%]  (Sampling) 
## Chain 2 Iteration: 700 / 1000 [ 70%]  (Sampling) 
## Chain 1 Iteration: 700 / 1000 [ 70%]  (Sampling) 
## Chain 4 Iteration: 800 / 1000 [ 80%]  (Sampling) 
## Chain 3 Iteration: 700 / 1000 [ 70%]  (Sampling) 
## Chain 2 Iteration: 800 / 1000 [ 80%]  (Sampling) 
## Chain 1 Iteration: 800 / 1000 [ 80%]  (Sampling) 
## Chain 4 Iteration: 900 / 1000 [ 90%]  (Sampling) 
## Chain 3 Iteration: 800 / 1000 [ 80%]  (Sampling) 
## Chain 2 Iteration: 900 / 1000 [ 90%]  (Sampling) 
## Chain 1 Iteration: 900 / 1000 [ 90%]  (Sampling) 
## Chain 4 Iteration: 1000 / 1000 [100%]  (Sampling) 
## Chain 4 finished in 222.8 seconds.
## Chain 3 Iteration: 900 / 1000 [ 90%]  (Sampling) 
## Chain 2 Iteration: 1000 / 1000 [100%]  (Sampling) 
## Chain 2 finished in 229.0 seconds.
## Chain 1 Iteration: 1000 / 1000 [100%]  (Sampling) 
## Chain 1 finished in 232.3 seconds.
## Chain 3 Iteration: 1000 / 1000 [100%]  (Sampling) 
## Chain 3 finished in 233.8 seconds.
## 
## All 4 chains finished successfully.
## Mean chain execution time: 229.5 seconds.
## Total execution time: 234.3 seconds.
bgnbd_hier_means_stanfit$summary()
## # A tibble: 9,438 × 10
##    variable        mean   median      sd     mad       q5      q95  rhat ess_b…¹
##    <chr>          <dbl>    <dbl>   <dbl>   <dbl>    <dbl>    <dbl> <dbl>   <dbl>
##  1 lp__        -6.41e+4 -6.41e+4 1.03e+2 9.99e+1 -6.42e+4 -6.39e+4 1.02    131. 
##  2 lambda_mn    1.21e-1  1.21e-1 2.57e-3 2.61e-3  1.17e-1  1.26e-1 1.01    567. 
##  3 p_mn         1.70e-1  1.70e-1 5.55e-3 5.57e-3  1.61e-1  1.79e-1 1.04     97.7
##  4 lambda[1]    1.66e-1  1.61e-1 4.86e-2 4.75e-2  9.64e-2  2.52e-1 0.999  2807. 
##  5 lambda[2]    1.16e-1  1.02e-1 7.01e-2 6.13e-2  3.01e-2  2.47e-1 1.00   1821. 
##  6 lambda[3]    9.46e-2  8.52e-2 5.50e-2 5.15e-2  2.59e-2  2.02e-1 1.00   2220. 
##  7 lambda[4]    6.82e-2  5.82e-2 4.49e-2 3.62e-2  1.66e-2  1.49e-1 1.00   3056. 
##  8 lambda[5]    8.03e-2  5.16e-2 8.97e-2 5.53e-2  3.29e-3  2.53e-1 1.00   2252. 
##  9 lambda[6]    7.37e-2  4.09e-2 9.43e-2 4.50e-2  2.86e-3  2.69e-1 1.00   2399. 
## 10 lambda[7]    2.50e-1  2.37e-1 9.86e-2 9.21e-2  1.12e-1  4.36e-1 1.00   3406. 
## # … with 9,428 more rows, 1 more variable: ess_tail <dbl>, and abbreviated
## #   variable name ¹​ess_bulk

We have some basic HMC-based validity statistics we can check.

bgnbd_hier_means_stanfit$cmdstan_diagnose()
## Processing csv files: /home/rstudio/workshop/stan_models/fit_bgnbd_hier_means-1.csvWarning: non-fatal error reading adaptation data
## , /home/rstudio/workshop/stan_models/fit_bgnbd_hier_means-2.csvWarning: non-fatal error reading adaptation data
## , /home/rstudio/workshop/stan_models/fit_bgnbd_hier_means-3.csvWarning: non-fatal error reading adaptation data
## , /home/rstudio/workshop/stan_models/fit_bgnbd_hier_means-4.csvWarning: non-fatal error reading adaptation data
## 
## 
## Checking sampler transitions treedepth.
## Treedepth satisfactory for all transitions.
## 
## Checking sampler transitions for divergences.
## No divergent transitions found.
## 
## Checking E-BFMI - sampler transitions HMC potential energy.
## E-BFMI satisfactory.
## 
## Effective sample size satisfactory.
## 
## Split R-hat values satisfactory all parameters.
## 
## Processing complete, no problems detected.

3.2 Visual Diagnostics of the Sample Validity

Now that we have a sample from the posterior distribution we need to create a few different visualisations of the diagnostics.

sample_params <- c(
  "lambda_mn", "p_mn",
  "lambda[1]", "lambda[2]", "lambda[3]", "lambda[4]", "lambda[5]",
  "p[1]",      "p[2]",      "p[3]",      "p[4]",      "p[5]"
  )

bgnbd_hier_means_stanfit$draws(inc_warmup = FALSE) %>%
  mcmc_trace(pars = sample_params) +
  expand_limits(y = 0) +
  labs(
    x = "Iteration",
    y = "Value",
    title = "Traceplot of Sample of Lambda and p Values"
    ) +
  theme(axis.text.x = element_text(size = 10))

A common MCMC diagnostic is \(\hat{R}\) - which is a measure of the ‘similarity’ of the chains.

bgnbd_hier_means_stanfit %>%
  rhat(pars = c("lambda_mn", "p_mn", "lambda", "p")) %>%
  mcmc_rhat() +
    ggtitle("Plot of Parameter R-hat Values")

Related to this quantity is the concept of effective sample size, \(N_{eff}\), an estimate of the size of the sample from a statistical information point of view.

bgnbd_hier_means_stanfit %>%
  neff_ratio(pars = c("lambda_mn", "p_mn", "lambda", "p")) %>%
  mcmc_neff() +
    ggtitle("Plot of Parameter Effective Sample Sizes")

3.3 Validate the Hierarchical Lambda-Mean Model

We now want to validate the outputs of this model by looking at the posteriors for both \(\lambda\), \(\mu\) and p_alive, and then running our simulations using these outputs.

bgnbd_hier_means_validation_tbl <- bgnbd_hier_means_stanfit %>%
  recover_types(btyd_fitdata_tbl) %>%
  spread_draws(lambda[customer_id], p[customer_id]) %>%
  ungroup() %>%
  select(
    customer_id, draw_id = .draw, post_lambda = lambda, post_p = p
    )

bgnbd_hier_means_validation_tbl %>% glimpse()
## Rows: 9,432,000
## Columns: 4
## $ customer_id <chr> "12346", "12346", "12346", "12346", "12346", "12346", "123…
## $ draw_id     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
## $ post_lambda <dbl> 0.0953844, 0.0853009, 0.0811996, 0.2378080, 0.1220780, 0.2…
## $ post_p      <dbl> 0.0948068, 0.0799318, 0.1330810, 0.1203820, 0.0702531, 0.0…

Having constructed our simulations inputs, we now generate our simulations.

precompute_dir <- "precompute/bgnbd_hier_means"

precomputed_tbl <- dir_ls(precompute_dir) %>%
  enframe(name = NULL, value = "sim_file") %>%
  mutate(sim_file = sim_file %>% as.character())


bgnbd_hier_means_validsims_lookup_tbl <- bgnbd_hier_means_validation_tbl %>%
  group_nest(customer_id, .key = "cust_params") %>%
  mutate(
    sim_file = glue(
      "{precompute_dir}/sims_bgnbd_hier_means_{customer_id}.rds"
      )
    )
    

exec_tbl <-  bgnbd_hier_means_validsims_lookup_tbl %>%
  anti_join(precomputed_tbl, by = "sim_file")


if(exec_tbl %>% nrow() > 0) {
  exec_tbl %>%
    mutate(
      calc_file = future_map2_lgl(
        sim_file, cust_params,
        run_bgnbd_simulations_chunk,
        start_dttm = as.POSIXct("2011-04-01"),
        end_dttm   = as.POSIXct("2011-12-10"),
  
        .options = furrr_options(
          globals  = c(
            "calculate_event_times", "rgamma_mucv", "gamma_mucv2shaperate",
            "generate_bgnbd_validation_transactions"
            ),
          packages   = c("tidyverse", "fs"),
          scheduling = FALSE,
          seed       = 4202
          ),
        .progress = TRUE
        )
      )
}

exec_tbl %>% glimpse()
## Rows: 0
## Columns: 3
## $ customer_id <chr> 
## $ cust_params <list<tibble[,3]>> list()
## $ sim_file    <glue>
bgnbd_hier_means_validsims_lookup_tbl %>% glimpse()
## Rows: 4,716
## Columns: 3
## $ customer_id <chr> "12346", "12347", "12348", "12349", "12350", "12351", "123…
## $ cust_params <list<tibble[,3]>> [<tbl_df[2000 x 3]>], [<tbl_df[2000 x 3]>], […
## $ sim_file    <glue> "precompute/bgnbd_hier_means/sims_bgnbd_hier_means_12346.…

We now load all the simulations into a file.

bgnbd_hier_means_validsims_tbl <- bgnbd_hier_means_validsims_lookup_tbl %>%
  mutate(
    data = map(sim_file, ~ .x %>% read_rds() %>% select(draw_id, sim_tnx_count, sim_tnx_last))
    ) %>%
  select(customer_id, sim_file, data) %>%
  unnest(data)

bgnbd_hier_means_validsims_tbl %>% glimpse()
## Rows: 9,432,000
## Columns: 5
## $ customer_id   <chr> "12346", "12346", "12346", "12346", "12346", "12346", "1…
## $ sim_file      <glue> "precompute/bgnbd_hier_means/sims_bgnbd_hier_means_1234…
## $ draw_id       <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
## $ sim_tnx_count <int> 4, 3, 1, 3, 1, 2, 1, 3, 9, 1, 2, 12, 0, 7, 9, 3, 7, 6, 3…
## $ sim_tnx_last  <dttm> 2011-08-31 12:49:57, 2011-11-09 15:56:17, 2011-05-19 05…

We also want to look at how our simulated data compares to the transaction data that occurred after Apr 1 2011.

tnx_data_tbl <- btyd_obs_stats_tbl %>% 
  semi_join(bgnbd_hier_means_validsims_tbl, by = "customer_id")

obs_customer_count  <- tnx_data_tbl %>% nrow()
obs_total_tnx_count <- tnx_data_tbl %>% pull(tnx_count) %>% sum()

bgnbd_hier_means_tnx_simsumm_tbl <- bgnbd_hier_means_validsims_tbl %>%
  group_by(draw_id) %>%
  summarise(
    .groups = "drop",
    
    sim_customer_count  = length(sim_tnx_count[sim_tnx_count > 0]),
    sim_total_tnx_count = sum(sim_tnx_count)
    )


ggplot(bgnbd_hier_means_tnx_simsumm_tbl) +
  geom_histogram(aes(x = sim_customer_count), binwidth = 10) +
  geom_vline(aes(xintercept = obs_customer_count), colour = "red") +
  labs(
    x = "Simulated Customers With Transactions",
    y = "Frequency",
    title = "Histogram of Count of Customers Transacted",
    subtitle = "Observed Count in Red"
    )

ggplot(bgnbd_hier_means_tnx_simsumm_tbl) +
  geom_histogram(aes(x = sim_total_tnx_count), binwidth = 50) +
  geom_vline(aes(xintercept = obs_total_tnx_count), colour = "red") +
  labs(
    x = "Simulated Transaction Count",
    y = "Frequency",
    title = "Histogram of Count of Total Transaction Count",
    subtitle = "Observed Count in Red"
    )

4 R Environment

options(width = 120L)
sessioninfo::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.2.1 (2022-06-23)
##  os       Ubuntu 20.04.5 LTS
##  system   x86_64, linux-gnu
##  ui       X11
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       Etc/UTC
##  date     2022-11-07
##  pandoc   2.19.2 @ /usr/local/bin/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
##  package              * version    date (UTC) lib source
##  abind                  1.4-5      2016-07-21 [1] RSPM (R 4.2.0)
##  arrayhelpers           1.1-0      2020-02-04 [1] RSPM (R 4.2.0)
##  assertthat             0.2.1      2019-03-21 [1] RSPM (R 4.2.0)
##  backports              1.4.1      2021-12-13 [1] RSPM (R 4.2.0)
##  base64enc              0.1-3      2015-07-28 [1] RSPM (R 4.2.0)
##  bayesplot            * 1.9.0      2022-03-10 [1] RSPM (R 4.2.0)
##  bit                    4.0.4      2020-08-04 [1] RSPM (R 4.2.0)
##  bit64                  4.0.5      2020-08-30 [1] RSPM (R 4.2.0)
##  bookdown               0.29       2022-09-12 [1] RSPM (R 4.2.0)
##  boot                   1.3-28     2021-05-03 [2] CRAN (R 4.2.1)
##  bridgesampling         1.1-2      2021-04-16 [1] RSPM (R 4.2.0)
##  brms                 * 2.18.0     2022-11-01 [1] Github (paul-buerkner/brms@28f778d)
##  Brobdingnag            1.2-9      2022-10-19 [1] RSPM (R 4.2.0)
##  broom                  1.0.1      2022-08-29 [1] RSPM (R 4.2.0)
##  bslib                  0.4.0      2022-07-16 [1] RSPM (R 4.2.0)
##  cachem                 1.0.6      2021-08-19 [1] RSPM (R 4.2.0)
##  callr                  3.7.2      2022-08-22 [1] RSPM (R 4.2.0)
##  cellranger             1.1.0      2016-07-27 [1] RSPM (R 4.2.0)
##  checkmate              2.1.0      2022-04-21 [1] RSPM (R 4.2.0)
##  cli                    3.4.1      2022-09-23 [1] RSPM (R 4.2.0)
##  cmdstanr             * 0.5.3      2022-11-01 [1] Github (stan-dev/cmdstanr@22b391e)
##  coda                   0.19-4     2020-09-30 [1] RSPM (R 4.2.0)
##  codetools              0.2-18     2020-11-04 [2] CRAN (R 4.2.1)
##  colorspace             2.0-3      2022-02-21 [1] RSPM (R 4.2.0)
##  colourpicker           1.1.1      2021-10-04 [1] RSPM (R 4.2.0)
##  conflicted           * 1.1.0      2021-11-26 [1] RSPM (R 4.2.0)
##  cowplot              * 1.1.1      2020-12-30 [1] RSPM (R 4.2.0)
##  crayon                 1.5.2      2022-09-29 [1] RSPM (R 4.2.0)
##  crosstalk              1.2.0      2021-11-04 [1] RSPM (R 4.2.0)
##  curl                   4.3.3      2022-10-06 [1] RSPM (R 4.2.0)
##  DBI                    1.1.3      2022-06-18 [1] RSPM (R 4.2.0)
##  dbplyr                 2.2.1      2022-06-27 [1] RSPM (R 4.2.0)
##  digest                 0.6.30     2022-10-18 [1] RSPM (R 4.2.0)
##  directlabels         * 2021.1.13  2021-01-16 [1] RSPM (R 4.2.0)
##  distributional         0.3.1      2022-09-02 [1] RSPM (R 4.2.0)
##  dplyr                * 1.0.10     2022-09-01 [1] RSPM (R 4.2.0)
##  DT                     0.26       2022-10-19 [1] RSPM (R 4.2.0)
##  dygraphs               1.1.1.6    2018-07-11 [1] RSPM (R 4.2.0)
##  ellipsis               0.3.2      2021-04-29 [1] RSPM (R 4.2.0)
##  evaluate               0.17       2022-10-07 [1] RSPM (R 4.2.0)
##  fansi                  1.0.3      2022-03-24 [1] RSPM (R 4.2.0)
##  farver                 2.1.1      2022-07-06 [1] RSPM (R 4.2.0)
##  fastmap                1.1.0      2021-01-25 [1] RSPM (R 4.2.0)
##  forcats              * 0.5.2      2022-08-19 [1] RSPM (R 4.2.0)
##  fs                   * 1.5.2      2021-12-08 [1] RSPM (R 4.2.0)
##  furrr                * 0.3.1      2022-08-15 [1] RSPM (R 4.2.0)
##  future               * 1.28.0     2022-09-02 [1] RSPM (R 4.2.0)
##  gamm4                  0.2-6      2020-04-03 [1] RSPM (R 4.2.0)
##  gargle                 1.2.1      2022-09-08 [1] RSPM (R 4.2.0)
##  generics               0.1.3      2022-07-05 [1] RSPM (R 4.2.0)
##  ggdist                 3.2.0      2022-07-19 [1] RSPM (R 4.2.0)
##  ggplot2              * 3.3.6      2022-05-03 [1] RSPM (R 4.2.0)
##  ggridges               0.5.4      2022-09-26 [1] RSPM (R 4.2.0)
##  globals                0.16.1     2022-08-28 [1] RSPM (R 4.2.0)
##  glue                 * 1.6.2      2022-02-24 [1] RSPM (R 4.2.0)
##  googledrive            2.0.0      2021-07-08 [1] RSPM (R 4.2.0)
##  googlesheets4          1.0.1      2022-08-13 [1] RSPM (R 4.2.0)
##  gridExtra              2.3        2017-09-09 [1] RSPM (R 4.2.0)
##  gtable                 0.3.1      2022-09-01 [1] RSPM (R 4.2.0)
##  gtools                 3.9.3      2022-07-11 [1] RSPM (R 4.2.0)
##  haven                  2.5.1      2022-08-22 [1] RSPM (R 4.2.0)
##  highr                  0.9        2021-04-16 [1] RSPM (R 4.2.0)
##  hms                    1.1.2      2022-08-19 [1] RSPM (R 4.2.0)
##  htmltools              0.5.3      2022-07-18 [1] RSPM (R 4.2.0)
##  htmlwidgets            1.5.4      2021-09-08 [1] RSPM (R 4.2.0)
##  httpuv                 1.6.6      2022-09-08 [1] RSPM (R 4.2.0)
##  httr                   1.4.4      2022-08-17 [1] RSPM (R 4.2.0)
##  igraph                 1.3.5      2022-09-22 [1] RSPM (R 4.2.0)
##  inline                 0.3.19     2021-05-31 [1] RSPM (R 4.2.0)
##  jquerylib              0.1.4      2021-04-26 [1] RSPM (R 4.2.0)
##  jsonlite               1.8.3      2022-10-21 [1] RSPM (R 4.2.0)
##  knitr                  1.40       2022-08-24 [1] RSPM (R 4.2.0)
##  labeling               0.4.2      2020-10-20 [1] RSPM (R 4.2.0)
##  later                  1.3.0      2021-08-18 [1] RSPM (R 4.2.0)
##  lattice                0.20-45    2021-09-22 [2] CRAN (R 4.2.1)
##  lifecycle              1.0.3      2022-10-07 [1] RSPM (R 4.2.0)
##  listenv                0.8.0      2019-12-05 [1] RSPM (R 4.2.0)
##  lme4                   1.1-30     2022-07-08 [1] RSPM (R 4.2.0)
##  loo                    2.5.1      2022-03-24 [1] RSPM (R 4.2.0)
##  lubridate            * 1.8.0      2021-10-07 [1] RSPM (R 4.2.0)
##  magrittr             * 2.0.3      2022-03-30 [1] RSPM (R 4.2.0)
##  markdown               1.2        2022-10-19 [1] RSPM (R 4.2.0)
##  MASS                   7.3-57     2022-04-22 [2] CRAN (R 4.2.1)
##  Matrix                 1.5-1      2022-09-13 [1] RSPM (R 4.2.0)
##  matrixStats            0.62.0     2022-04-19 [1] RSPM (R 4.2.0)
##  memoise                2.0.1      2021-11-26 [1] RSPM (R 4.2.0)
##  mgcv                   1.8-40     2022-03-29 [2] CRAN (R 4.2.1)
##  mime                   0.12       2021-09-28 [1] RSPM (R 4.2.0)
##  miniUI                 0.1.1.1    2018-05-18 [1] RSPM (R 4.2.0)
##  minqa                  1.2.5      2022-10-19 [1] RSPM (R 4.2.0)
##  modelr                 0.1.9      2022-08-19 [1] RSPM (R 4.2.0)
##  munsell                0.5.0      2018-06-12 [1] RSPM (R 4.2.0)
##  mvtnorm                1.1-3      2021-10-08 [1] RSPM (R 4.2.0)
##  nlme                   3.1-157    2022-03-25 [2] CRAN (R 4.2.1)
##  nloptr                 2.0.3      2022-05-26 [1] RSPM (R 4.2.0)
##  parallelly             1.32.1     2022-07-21 [1] RSPM (R 4.2.0)
##  PerformanceAnalytics * 2.0.4      2020-02-06 [1] RSPM (R 4.2.0)
##  pillar                 1.8.1      2022-08-19 [1] RSPM (R 4.2.0)
##  pkgbuild               1.3.1      2021-12-20 [1] RSPM (R 4.2.0)
##  pkgconfig              2.0.3      2019-09-22 [1] RSPM (R 4.2.0)
##  plyr                   1.8.7      2022-03-24 [1] RSPM (R 4.2.0)
##  posterior            * 1.3.1      2022-09-06 [1] RSPM (R 4.2.0)
##  prettyunits            1.1.1      2020-01-24 [1] RSPM (R 4.2.0)
##  processx               3.8.0      2022-10-26 [1] RSPM (R 4.2.0)
##  projpred               2.2.1      2022-09-20 [1] RSPM (R 4.2.0)
##  promises               1.2.0.1    2021-02-11 [1] RSPM (R 4.2.0)
##  ps                     1.7.2      2022-10-26 [1] RSPM (R 4.2.0)
##  purrr                * 0.3.5      2022-10-06 [1] RSPM (R 4.2.0)
##  quadprog               1.5-8      2019-11-20 [1] RSPM (R 4.2.0)
##  Quandl                 2.11.0     2021-08-11 [1] RSPM (R 4.2.0)
##  quantmod             * 0.4.20     2022-04-29 [1] RSPM (R 4.2.0)
##  R6                     2.5.1      2021-08-19 [1] RSPM (R 4.2.0)
##  Rcpp                 * 1.0.9      2022-07-08 [1] RSPM (R 4.2.0)
##  RcppParallel           5.1.5      2022-01-05 [1] RSPM (R 4.2.0)
##  readr                * 2.1.3      2022-10-01 [1] RSPM (R 4.2.0)
##  readxl                 1.4.1      2022-08-17 [1] RSPM (R 4.2.0)
##  reprex                 2.0.2      2022-08-17 [1] RSPM (R 4.2.0)
##  reshape2               1.4.4      2020-04-09 [1] RSPM (R 4.2.0)
##  rlang                * 1.0.6      2022-09-24 [1] RSPM (R 4.2.0)
##  rmarkdown              2.17       2022-10-07 [1] RSPM (R 4.2.0)
##  rmdformats             1.0.4      2022-05-17 [1] RSPM (R 4.2.0)
##  rstan                  2.26.13    2022-11-01 [1] local
##  rstantools             2.2.0      2022-04-08 [1] RSPM (R 4.2.0)
##  rvest                  1.0.3      2022-08-19 [1] RSPM (R 4.2.0)
##  sass                   0.4.2      2022-07-16 [1] RSPM (R 4.2.0)
##  scales               * 1.2.1      2022-08-20 [1] RSPM (R 4.2.0)
##  sessioninfo            1.2.2      2021-12-06 [1] RSPM (R 4.2.0)
##  shiny                  1.7.3      2022-10-25 [1] RSPM (R 4.2.0)
##  shinyjs                2.1.0      2021-12-23 [1] RSPM (R 4.2.0)
##  shinystan              2.6.0      2022-03-03 [1] RSPM (R 4.2.0)
##  shinythemes            1.2.0      2021-01-25 [1] RSPM (R 4.2.0)
##  StanHeaders            2.26.13    2022-11-01 [1] local
##  stringi                1.7.8      2022-07-11 [1] RSPM (R 4.2.0)
##  stringr              * 1.4.1      2022-08-20 [1] RSPM (R 4.2.0)
##  svUnit                 1.0.6      2021-04-19 [1] RSPM (R 4.2.0)
##  tensorA                0.36.2     2020-11-19 [1] RSPM (R 4.2.0)
##  threejs                0.3.3      2020-01-21 [1] RSPM (R 4.2.0)
##  tibble               * 3.1.8      2022-07-22 [1] RSPM (R 4.2.0)
##  tidybayes            * 3.0.2.9000 2022-11-01 [1] Github (mjskay/tidybayes@1efbdef)
##  tidyquant            * 1.0.5      2022-09-08 [1] RSPM (R 4.2.0)
##  tidyr                * 1.2.1      2022-09-08 [1] RSPM (R 4.2.0)
##  tidyselect             1.2.0      2022-10-10 [1] RSPM (R 4.2.0)
##  tidyverse            * 1.3.2      2022-07-18 [1] RSPM (R 4.2.0)
##  TTR                  * 0.24.3     2021-12-12 [1] RSPM (R 4.2.0)
##  tzdb                   0.3.0      2022-03-28 [1] RSPM (R 4.2.0)
##  utf8                   1.2.2      2021-07-24 [1] RSPM (R 4.2.0)
##  V8                     4.2.1      2022-08-07 [1] RSPM (R 4.2.0)
##  vctrs                  0.5.0      2022-10-22 [1] RSPM (R 4.2.0)
##  vroom                  1.6.0      2022-09-30 [1] RSPM (R 4.2.0)
##  withr                  2.5.0      2022-03-03 [1] RSPM (R 4.2.0)
##  xfun                   0.34       2022-10-18 [1] RSPM (R 4.2.0)
##  xml2                   1.3.3      2021-11-30 [1] RSPM (R 4.2.0)
##  xtable                 1.8-4      2019-04-21 [1] RSPM (R 4.2.0)
##  xts                  * 0.12.2     2022-10-16 [1] RSPM (R 4.2.0)
##  yaml                   2.3.6      2022-10-18 [1] RSPM (R 4.2.0)
##  zoo                  * 1.8-11     2022-09-17 [1] RSPM (R 4.2.0)
## 
##  [1] /usr/local/lib/R/site-library
##  [2] /usr/local/lib/R/library
## 
## ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
options(width = 80L)